When NOT considering restroom access,
# Convert binary variable to factors
subway_cleaned <- subway_cleaned %>%
mutate(
ada <- factor(ada, levels = c("FALSE", "TRUE")),
free_crossover <- factor(free_crossover, levels = c("FALSE", "TRUE"))
)
# Select the Variables for Clustering
clustering_data <- subway_cleaned %>%
dplyr::select(entrance_type, staffing, ada, free_crossover, station_latitude, station_longitude, station_name)
set.seed(123) # For reproducibility
km_result <- clustering_data %>%
dplyr::select(-station_latitude, -station_longitude, -station_name) %>%
kmodes(modes = 3, iter.max = 10)
clustering_data$.cluster <- factor(km_result$cluster)
# Define a Mode function
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
cluster_profiles <- clustering_data %>%
group_by(.cluster) %>%
summarise(across(everything(), ~ Mode(.x)))
knitr::kable(cluster_profiles)
| 1 |
Stair |
FULL |
FALSE |
TRUE |
40.64136 |
-74.01788 |
59th St |
| 2 |
Stair |
FULL |
FALSE |
TRUE |
40.66040 |
-73.99809 |
25th St |
| 3 |
Stair |
FULL |
FALSE |
TRUE |
40.68367 |
-73.97881 |
36th St |
clustering_data <- clustering_data %>%
mutate(
accessibility_level = case_when(
.cluster == 1 ~ "High Accessibility",
.cluster == 2 ~ "Medium Accessibility",
.cluster == 3 ~ "Low Accessibility"
)
)
pal <- leaflet::colorFactor(
palette = c("chartreuse", "darkgoldenrod1", "brown2"), # Adjust colors as needed
domain = clustering_data$accessibility_level
)
leaflet() |>
addTiles() |>
addCircleMarkers(data = clustering_data,
lng = ~station_longitude,
lat = ~station_latitude,
label = ~station_name,
radius = 3,
color = NA,
# color = ~pal(accessibility_level),
fillColor = ~pal(accessibility_level),
stroke = TRUE, fillOpacity = 0.75,
popup = ~paste("Ada:", ada,
"<br> Staffing:", staffing,
"<br> Entrance type:", entrance_type,
"<br> Free crossover:", free_crossover)) |>
addProviderTiles(providers$CartoDB.Positron) |>
addLegend(
"bottomright",
pal = pal,
values = clustering_data$accessibility_level,
title = "Accessibility Level",
opacity = 1
)
When considering restroom access,
subway_with_restroom = read_csv("Data/cleaned_subway_restroom_data.csv")
subway_with_restroom2 <- subway_with_restroom %>%
mutate(
#convert to logical
restroom_changing_stations_logic = as.logical(restroom_changing_stations),
restroom_status_logic = as.logical(restroom_status),
restroom_accessibility = fct_explicit_na(restroom_accessibility, na_level = "Unknown"),
restroom_open = fct_explicit_na(restroom_open, na_level = "Unknown")
)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `restroom_accessibility =
## fct_explicit_na(restroom_accessibility, na_level = "Unknown")`.
## Caused by warning:
## ! `fct_explicit_na()` was deprecated in forcats 1.0.0.
## ℹ Please use `fct_na_value_to_level()` instead.
# Select the Variables for Clustering
clustering_merged_data <- subway_with_restroom2 %>%
dplyr::select(entrance_type, staffing, ada, free_crossover, station_latitude, station_longitude, station_name
,restroom_open, restroom_accessibility, restroom_changing_stations_logic, restroom_status_logic)
set.seed(123) # For reproducibility
km_result2 <- clustering_merged_data %>%
dplyr::select(-station_latitude, -station_longitude, -station_name) %>%
klaR::kmodes(modes = 3, iter.max = 10)
clustering_merged_data$.cluster <- factor(km_result2$cluster)
# Define a Mode function
Mode_for_merged <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
cluster_merged_profiles <- clustering_merged_data %>%
group_by(.cluster) %>%
dplyr::select(-station_latitude, -station_longitude) %>%
dplyr::select(station_name, everything()) %>%
summarise(across(everything(), ~ Mode_for_merged(.x)))
knitr::kable(cluster_merged_profiles)
| 1 |
36th St |
Stair |
FULL |
FALSE |
TRUE |
Year Round |
Fully Accessible |
FALSE |
TRUE |
| 2 |
45th St |
Stair |
FULL |
FALSE |
TRUE |
Year Round |
Unknown |
FALSE |
TRUE |
| 3 |
25th St |
Stair |
FULL |
FALSE |
FALSE |
Year Round |
Not Accessible |
FALSE |
TRUE |
clustering_merged_data <- clustering_merged_data %>%
mutate(
accessibility_level = case_when(
.cluster == 1 ~ "High Accessibility",
.cluster == 2 ~ "Medium Accessibility",
.cluster == 3 ~ "Low Accessibility"
)
)
pal <- leaflet::colorFactor(
palette = c("chartreuse", "darkgoldenrod1", "brown2"), # Adjust colors as needed
domain = clustering_merged_data$accessibility_level
)
leaflet() |>
addTiles() |>
addCircleMarkers(data = clustering_merged_data,
lng = ~station_longitude,
lat = ~station_latitude,
label = ~station_name,
radius = 3,
color = NA,
# color = ~pal(accessibility_level),
fillColor = ~pal(accessibility_level),
stroke = TRUE, fillOpacity = 0.75,
popup = ~paste("Ada:", ada,
"<br> Staffing:", staffing,
"<br> Entrance type:", entrance_type,
"<br> Free crossover:", free_crossover)) |>
addProviderTiles(providers$CartoDB.Positron) |>
addLegend(
"bottomright",
pal = pal,
values = clustering_merged_data$accessibility_level,
title = "Accessibility Level",
opacity = 1
)